library(lme4)
require(ggplot2)
require(GGally)
require(reshape2)
require(compiler)
require(parallel)
require(boot)
library(visreg)
library(foreign)
library(stargazer)

#######################################
############# BEEPS SURVEY ############
#######################################

d1<-read.dta("Poland-2005--full data-5999.dta")
gm<-read.csv("PSU_data.csv")

dat<-merge(d1, gm[!is.na(gm$divMig),],by="citowvil", all.x=TRUE)

dat$Resettled[is.na(dat$Resettled)]<-0  

table(dat$citowvil[dat$Resettled==1])

table(dat$q5a)
dat$privatisation<-ifelse(dat$q5a==1, 1, 0)
table(dat$privatisation)
dat$founded[dat$q5a==1]<-"privatized"
dat$founded[dat$q5a==2]<-"from scratch"
dat$founded[dat$q5a>2]<-"other"
dat$founded<-factor(dat$founded, levels=c("other", "privatized", "from scratch"))
dat$type[dat$s2a==1]<-"Single proprietorship"
dat$type[dat$s2a==2]<-"Partnership"
dat$type[dat$s2a==4 | dat$s2a==5]<-"Corporation"
dat$type[dat$s2a==6 | dat$s2a==3]<-"Coop or Other"
dat$type[dat$s2a==7 | dat$s2a==8 | dat$s2a==9]<-"State-owned"
dat$before1988<-ifelse(dat$s1a<1988, 1,0)
dat$private<-ifelse(dat$s2b==1,1,0)
dat$s4a<-factor(dat$s4a)
dat$s4b<-factor(dat$s4b) #2-49 is coded 1; 50-249 is coded 2; 250-9999 is coded 3
dat$size[dat$s4b==1]<-"under50emp"
dat$size[dat$s4b==2]<-"50to249emp"
dat$size[dat$s4b==3]<-"over249emp"

dat$s3<-factor(dat$s3)#now 7 categories are 
dat$sector[dat$s3=="Manufacturing" | dat$s3=="Construction"]<-"Manufacturing"
dat$sector[dat$s3=="Hotels and Restaurant" | dat$s3=="Other services" | dat$s3=="Transport" | dat$s3=="Trade" | dat$s3=="Real estate"]<-"Services"
table(dat$sector)
dat$sector7<-factor(dat$s3)

#####################################
######## MAIN DV: OBSTACLES #########
#####################################

dat$JudObst<-ifelse(dat$q54p ==4 | dat$q54p ==3,1,0 ) #functioning of the judiciary q54p
dat$CorObst<-ifelse(dat$q54q ==4 | dat$q54q ==3,1,0) #corruption
dat$CrimOObst<-ifelse(dat$q54s ==4 | dat$q54s ==3,1,0) #organized crime/mafia q54s
dat$TaxAObst<-ifelse(dat$q54i ==4 | dat$q54i ==3,1,0) #tax administration  : q54i

#Mean for all four obstacles 
dat$JudCorMafTax<-apply(dat[,which(names(dat)=="q54p" | names(dat)=="q54q" | names(dat)=="q54s"  |names(dat)=="q54i")], 1, mean,na.rm=TRUE)
summary(dat$JudCorMafTax)

#Confidence in the legal system:
dat$LegConf<-factor(ifelse(dat$q28>=3,1,0))


############################################################
###################### MULTI-LEVEL MODELS  #################
############################################################
judiciary <- glmer(JudObst ~ divMig+ shareResettled+size + sector +city +founded+log(GermDistKM)+(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun= 2e5)),family=binomial)
summary(judiciary) 

#Tax administration
taxadmin <- glmer(TaxAObst ~ divMig+ shareResettled+size + sector + founded +city+ log(GermDistKM)+(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun= 2e5)),family=binomial)
summary(taxadmin)   

## Corruption 
corruption <- glmer(CorObst ~ divMig+ shareResettled+size + sector + founded +city+ log(GermDistKM)+(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun= 2e5)),family=binomial)
summary(corruption) 

##ORGANIZED CRIME 
orgcrime<- glmer(CrimOObst ~ divMig+ shareResettled+size + sector +city + founded +log(GermDistKM)+ (1|citowvil), data= dat[dat$private==1 & dat$before1988==0,],control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=1000000)), family=binomial)
summary(orgcrime) 

##All four obstacles (mean)
allobst <- lmer(JudCorMafTax ~ divMig+ shareResettled +size+ sector +founded+city + log(GermDistKM) +(1|citowvil), data=dat[dat$private==1 & dat$before1988==0,], control=lmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)))
summary(allobst)#


############################################
####### Table A24 in the Appendix:  ########
############################################

stargazer(judiciary, taxadmin, corruption, orgcrime, allobst, digits=2, style="APSR", dep.var.labels=c("Judiciary", "Tax Admin", "Corruption", "Org. Crime", "All Four"), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)


############################################
####### Figures based on the tables   ######
############################################

##Subset the data to exclude NAs
dat1<-dat[dat$private==1 & dat$before1988==0 & !(is.na(dat$divMig) & !is.na(dat$founded)),]

tmpdat <- dat1[, c("divMig", "shareResettled", "sector","size", "founded", "city", "GermDistKM", "citowvil")]
jvalues <- with(tmpdat, seq(from = min(divMig), to = max(divMig), length.out = 100))

# calculate predicted probabilities and store in a list
pp <- lapply(jvalues, function(j) {
    tmpdat$divMig <- j
    predict(judiciary, newdata = tmpdat, type = "response", allow.new.levels=TRUE)
})

# average marginal predicted probability across a few different values of divMig
sapply(pp[c(1, 20, 40, 60, 80, 100)], mean)

# get the means with lower and upper quartiles
plotdat <- t(sapply(pp, function(x) {
    c(M = mean(x), quantile(x, c(0.25, 0.75)))
}))
plotdat <- as.data.frame(cbind(plotdat, jvalues))
colnames(plotdat) <- c("PredictedProbability", "Lower", "Upper", "migDiv")
head(plotdat)

# plot average marginal predicted probabilities for juiciary: 
ggplot(plotdat, aes(x = migDiv, y = PredictedProbability)) + geom_linerange(aes(ymin = Lower,
    ymax = Upper)) + geom_line(size = 2) + ylim(c(0, 1))+xlab("Migrant Diversity (1948)")+ylab("Judiciary as Obstacle (2005)")+theme(axis.text.x = element_text(size=13, colour="grey20"), axis.text.y = element_text(size=13, colour="grey20"), axis.title.x=element_text(size=14), axis.title.y=element_text(size=14))


### TAX ADMINISTRATION :   
pp <- lapply(jvalues, function(j) {
    tmpdat$divMig <- j
    predict(taxadmin, newdata = tmpdat, type = "response", allow.new.levels=TRUE)
})
sapply(pp[c(1, 20, 40, 60, 80, 100)], mean)

plotdat <- t(sapply(pp, function(x) {
    c(M = mean(x), quantile(x, c(0.25, 0.75)))
}))
plotdat <- as.data.frame(cbind(plotdat, jvalues))
colnames(plotdat) <- c("PredictedProbability", "Lower", "Upper", "migDiv")
head(plotdat)
ggplot(plotdat, aes(x = migDiv, y = PredictedProbability)) + geom_linerange(aes(ymin = Lower,
    ymax = Upper)) + geom_line(size = 2) + ylim(c(0, 1))+xlab("Migrant Diversity (1948)")+ylab("Tax Administration as Obstacle (2005)")+theme(axis.text.x = element_text(size=13, colour="grey20"), axis.text.y = element_text(size=13, colour="grey20"), axis.title.x=element_text(size=14), axis.title.y=element_text(size=14))

#Corruption: 

pp <- lapply(jvalues, function(j) {
    tmpdat$divMig <- j
    predict(corruption, newdata = tmpdat, type = "response", allow.new.levels=TRUE)
})
sapply(pp[c(1, 20, 40, 60, 80, 100)], mean)

plotdat <- t(sapply(pp, function(x) {
    c(M = mean(x), quantile(x, c(0.25, 0.75)))
}))
plotdat <- as.data.frame(cbind(plotdat, jvalues))
colnames(plotdat) <- c("PredictedProbability", "Lower", "Upper", "migDiv")
head(plotdat)
ggplot(plotdat, aes(x = migDiv, y = PredictedProbability)) + geom_linerange(aes(ymin = Lower,
    ymax = Upper)) + geom_line(size = 2) + ylim(c(0, 1))+xlab("Migrant Diversity (1948)")+ylab("Corruption as Obstacle (2005)")+theme(axis.text.x = element_text(size=13, colour="grey20"), axis.text.y = element_text(size=13, colour="grey20"), axis.title.x=element_text(size=14), axis.title.y=element_text(size=14))


######## ORGANIZED CRIME 
pp <- lapply(jvalues, function(j) {
    tmpdat$divMig <- j
    predict(orgcrime, newdata = tmpdat, type = "response", allow.new.levels=TRUE)
})
 # average marginal predicted probability across a few different values of MigDiv48
sapply(pp[c(1, 20, 40, 60, 80, 100)], mean)

plotdat <- t(sapply(pp, function(x) {
    c(M = mean(x), quantile(x, c(0.25, 0.75)))
}))
plotdat <- as.data.frame(cbind(plotdat, jvalues))
colnames(plotdat) <- c("PredictedProbability", "Lower", "Upper", "migDiv")
head(plotdat)
ggplot(plotdat, aes(x = migDiv, y = PredictedProbability)) + geom_linerange(aes(ymin = Lower,
    ymax = Upper)) + geom_line(size = 2) + ylim(c(0, 1))+xlab("Migrant Diversity (1948)")+ylab("Organized Crime as Obstacle (2005)")+theme(axis.text.x = element_text(size=13, colour="grey20"), axis.text.y = element_text(size=13, colour="grey20"), axis.title.x=element_text(size=14), axis.title.y=element_text(size=14))

################################################################
################  PLACEBOS for the appendix ####################
################################################################

dat$TaxObst<-ifelse(dat$q54h ==4 | dat$q54h ==3,1,0 ) #tax rates 
dat$ContrVObst<-ifelse(dat$q54u ==4 | dat$q54u ==3,1,0 )#Contract violations by customers or suppliers
dat$transport<-ifelse(dat$q54e ==4 | dat$q54e ==3,1,0 ) #transport
dat$FinObst<-ifelse(dat$q54a ==4 | dat$q54a ==3,1,0 ) #Access to financing
dat$RegObst<-ifelse(dat$q54j==4 | dat$q54j==3,1,0 ) #customs and trade regulations  q54j

#Tax rates: 
taxrates<-glmer(TaxObst ~ divMig+ shareResettled +size+ sector+ founded +city + log(GermDistKM) +(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)),family=binomial)
summary(taxrates) 

#Contract violations by customers or suppliers q54u
contrviol<-glmer(ContrVObst ~ divMig+ shareResettled +size+ sector+ founded +city + log(GermDistKM) +(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)),family=binomial)
summary(contrviol)   

#Access to financing
finance <- glmer(FinObst ~ divMig+ shareResettled +size+ sector+ founded +city + log(GermDistKM) +(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)),family=binomial)
summary(finance) 

#customs and trade regulations  q54j
customs <- glmer(RegObst ~ divMig+ shareResettled +size+ sector+ founded +city + log(GermDistKM) +(1|citowvil), data= dat[dat$private==1 & dat$before1988==0,], control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)),family=binomial)
summary(customs) 

stargazer(taxrates, contrviol, finance, customs, style="APSR", digits=2, dep.var.labels=c("Tax rates", "Contract violations",  "Financing", "Customs"), star.cutoffs=c(0.1, 0.05, 0.01), star.char=c("+", "*", "**"), notes = c("+ p<0.1; * p<0.05; ** p<0.01"), notes.append = F)



